Load packages:
library(tidyverse)
library(labelled)Resources:
num_obs <- 10000
# Generate standard normal distribution (default is mean = 0 and sd = 1)
set.seed(124)
stdnorm_dist <- rnorm(num_obs, mean = 0, sd = 1) # equivalent to rnorm(10)
# Generate normal distribution w/ custom mean and sd
set.seed(124)
norm_dist <- rnorm(num_obs, mean = 50, sd = 5)
# Generate left-skewed distribution
set.seed(124)
lskew_dist <- rbeta(num_obs, 5, 2)
# Generate right-skewed distribution
set.seed(124)
rskew_dist <- rbeta(num_obs, 2, 5)
# Create dataframe
df_generated_pop <- data.frame(stdnorm_dist, norm_dist, lskew_dist, rskew_dist)df_ipeds_pop %>% head()## # A tibble: 6 x 38
## instnm unitid opeid6 opeid control c15basic stabbr city zip locale region tuit_grad_res fee_grad_res tuit_grad_nres fee_grad_nres tuit_md_res fee_md_res tuit_md_nres fee_md_nres tuit_law_res fee_law_res tuit_law_nres fee_law_nres books_supplies roomboard_off oth_expense_off tuitfee_grad_res tuitfee_grad_nres tuitfee_md_res tuitfee_md_nres tuitfee_law_res tuitfee_law_nres coa_grad_res coa_grad_nres coa_md_res coa_md_nres coa_law_res coa_law_nres
## <chr> <dbl> <chr> <chr> <dbl+lbl> <dbl+lbl> <chr+lbl> <chr> <chr> <dbl+lbl> <dbl+lbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama A & M University 100654 001002 00100200 1 [Public] 18 [Master^s Colleges & Universities: Larger Programs] AL [Alabama] Normal 35762 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 10128 1414 20160 1414 NA NA NA NA NA NA NA NA 1600 9240 3090 11542 21574 NA NA NA NA 25472 35504 NA NA NA NA
## 2 University of Alabama at Birmingham 100663 001052 00105200 1 [Public] 15 [Doctoral Universities: Highest Research Activity] AL [Alabama] Birmingham 35294-0110 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 8100 0 19188 0 28978 0 62714 0 NA NA NA NA 1200 12307 5555 8100 19188 28978 62714 NA NA 27162 38250 48040 81776 NA NA
## 3 Amridge University 100690 025034 02503400 2 [Private not-for-profit] 20 [Master^s Colleges & Universities: Small Programs] AL [Alabama] Montgomery 36117-3553 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 11700 1300 11700 1300 NA NA NA NA NA NA NA NA 900 9600 1600 13000 13000 NA NA NA NA 25100 25100 NA NA NA NA
## 4 University of Alabama in Huntsville 100706 001055 00105500 1 [Public] 16 [Doctoral Universities: Higher Research Activity] AL [Alabama] Huntsville 35899 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 10632 826 24430 826 NA NA NA NA NA NA NA NA 2120 10400 3994 11458 25256 NA NA NA NA 27972 41770 NA NA NA NA
## 5 Alabama State University 100724 001005 00100500 1 [Public] 19 [Master^s Colleges & Universities: Medium Programs] AL [Alabama] Montgomery 36104-0271 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 7416 2740 14832 2740 NA NA NA NA NA NA NA NA 1600 7320 4228 10156 17572 NA NA NA NA 23304 30720 NA NA NA NA
## 6 The University of Alabama 100751 001051 00105100 1 [Public] 16 [Doctoral Universities: Higher Research Activity] AL [Alabama] Tuscaloosa 35487-0100 12 [City: Midsize] 5 [Southeast AL AR FL GA KY LA MS NC SC TN VA WV] 10780 0 30250 0 28978 0 62714 0 23610 0 43060 0 1000 13636 4600 10780 30250 28978 62714 23610 43060 30016 49486 48214 81950 42846 62296
df_generated_pop %>% head()## stdnorm_dist norm_dist lskew_dist rskew_dist
## 1 -1.38507062 43.07465 0.9172827 0.08271725
## 2 0.03832318 50.19162 0.7064826 0.29351743
## 3 -0.76303016 46.18485 0.8444117 0.15558827
## 4 0.21230614 51.06153 0.6694614 0.33053865
## 5 1.42553797 57.12769 0.3488487 0.65115131
## 6 0.74447982 53.72240 0.5401472 0.45985283
# Function to get sampling distribution (default: 1000 samples of size 200)
get_sampling_distribution <- function(data_vec, num_samples = 1000, sample_size = 200) {
sample_means <- vector(mode = 'numeric', num_samples)
for (i in 1:length(sample_means)) {
samp <- sample(data_vec, sample_size)
sample_means[[i]] <- mean(samp, na.rm = T)
}
sample_means
}
# Function to generate plot
plot_distribution <- function(data_vec1, data_vec2 = NULL, data_df = NULL, data_var = NULL, group_var = NULL, group_cat = NULL, pop_labels = NULL, show_group_hist = F, sampling_dist = F, plot_title = '') {
two_pop <- !is.null(group_var) || !is.null(data_vec2)
two_dist <- two_pop && !sampling_dist
# Prep dataframe
if (!is.null(data_df)) {
data_df[[data_var]] <- unclass(data_df[[data_var]]) # unclass haven_labelled
if (two_pop) {
data_df[[group_var]] <- unclass(data_df[[group_var]])
}
data_df <- data_df %>% filter(!is.na(get(data_var))) # remove NA rows
# If group_cat not provided, use 2 values from group_var
if (two_pop && is.null(group_cat)) {
group_cat <- sort(unique(na.omit(data_df[[group_var]])))[1:2]
}
# Create population vector(s)
if (!two_pop) { # single population
data_vec1 <- data_df[[data_var]]
} else { # two populations
data_vec1 <- (data_df %>% filter(get(group_var) == group_cat[[1]]))[[data_var]]
data_vec2 <- (data_df %>% filter(get(group_var) == group_cat[[2]]))[[data_var]]
}
}
# Get sampling distribution
if (sampling_dist) {
if (!two_pop) { # single population
data_vec1 <- get_sampling_distribution(data_vec1)
} else { # two populations
data_vec1_samp <- get_sampling_distribution(data_vec1)
data_vec2_samp <- get_sampling_distribution(data_vec2)
data_vec1 <- data_vec1_samp - data_vec2_samp
}
}
# Legend text
legend_text <- c(paste('Mean:', round(mean(data_vec1), 2),
'\nStd Dev:', round(sd(data_vec1), 2)),
paste('Median:', round(median(data_vec1), 2)))
if (two_dist) {
legend_text <- c(legend_text,
paste('Mean:', round(mean(data_vec2), 2),
'\nStd Dev:', round(sd(data_vec2), 2)),
paste('Median:', round(median(data_vec2), 2)))
}
if (!is.null(pop_labels)) {
pop1 <- pop_labels[[1]]
pop2 <- pop_labels[[2]]
} else if (!is.null(group_var)) {
pop1 <- str_c(group_var, '=', group_cat[[1]])
pop2 <- str_c(group_var, '=', group_cat[[2]])
} else {
pop1 <- 'Pop1'
pop2 <- 'Pop2'
}
# Create statistics dataframe
if (!two_dist) {
lines_vec <- c('dotted')
stats_vec <- c(mean(data_vec1), median(data_vec1))
legend_title <- 'Statistics'
if (two_pop && sampling_dist) {
legend_title <- str_c('Statistics\n(', pop1, ' - ', pop2, ')')
}
} else {
lines_vec <- c('dotted', 'dotdash')
stats_vec <- c(mean(data_vec1), median(data_vec1), mean(data_vec2), median(data_vec2))
legend_title <- str_c('Statistics\n(', pop1, ' vs. ', pop2, ')')
}
stats_df <- data.frame(
pop = rep(lines_vec, each = 2),
stat = rep(c('blue', 'red'), times = if_else(two_dist, 2, 1)),
val = stats_vec
)
stats_df$pop <- factor(stats_df$pop, levels = c('dotted', 'dotdash'))
# Plot distribution(s)
p <- ggplot() +
ggtitle(plot_title) + xlab('') + ylab('') +
geom_density(aes(x = data_vec1), alpha = 0.8)
if (!two_dist || show_group_hist) { # show histogram only if 1 pop or show_group_hist is TRUE
p <- p +
geom_histogram(aes(x = data_vec1, y = ..density..), alpha = 0.4, position = 'identity')
}
if (two_dist) {
p <- p +
geom_density(aes(x = data_vec2), alpha = 0.8)
if (show_group_hist) { # show histogram only if show_group_hist is TRUE
p <- p +
geom_histogram(aes(x = data_vec2, y = ..density..), alpha = 0.4, position = 'identity', fill = 'wheat4')
}
}
p <- p +
geom_vline(data = stats_df,
aes(xintercept = val, color = interaction(stat, pop), linetype = interaction(stat, pop)),
size = 0.6, alpha = 0.8) +
scale_color_manual(name = legend_title,
labels = legend_text,
values = as.character(stats_df$stat)) +
scale_linetype_manual(name = legend_title,
labels = legend_text,
values = as.character(stats_df$pop)) +
theme(plot.title = element_text(size = 10, face = 'bold', hjust = 0.5),
legend.title = element_text(size = 9, face = 'bold'),
legend.text = element_text(size = 8)) +
guides(col = guide_legend(ncol = if_else(two_dist, 2, 1)))
p
}# IPEDS data: tuitfee_grad_res
plot_distribution(df_ipeds_pop$tuitfee_grad_res)# IPEDS data: tuitfee_grad_res
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res')# IPEDS data: tuitfee_grad_res by control (default: use first 2 categorical values in group_var)
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control')# IPEDS data: tuitfee_grad_res by control, shade histogram
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
show_group_hist = T)# IPEDS data: tuitfee_grad_res by control, custom order
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 1))# IPEDS data: tuitfee_grad_res by control, custom categories
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 3))# Alternatively, prepare 2 populations first to pass in
pop1 <- (df_ipeds_pop %>% filter(unclass(control) == 1))$tuitfee_grad_res
pop2 <- (df_ipeds_pop %>% filter(unclass(control) == 2))$tuitfee_grad_res
plot_distribution(pop1, pop2)# Custom labels
plot_distribution(pop1, pop2, pop_labels = c('Public', 'Private not-for-profit'))# Take single random sample of size 200
set.seed(124)
df_generated_sample <- df_generated_pop[sample(nrow(df_generated_pop), 200), ]
set.seed(124)
df_ipeds_sample <- df_ipeds_pop[sample(nrow(df_ipeds_pop), 200), ]
# Randomly generated data: standard normal
plot_distribution(df_generated_sample$stdnorm_dist)# IPEDS data: tuitfee_grad_res
plot_distribution(df_ipeds_pop$tuitfee_grad_res, sampling_dist = T)# IPEDS data: tuitfee_grad_res
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', sampling_dist = T)# IPEDS data: tuitfee_grad_res by control (control=1 minus control=2)
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
sampling_dist = T, pop_labels = c('Public', 'Private'))# IPEDS data: tuitfee_grad_res by control (control=2 minus control=1)
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 1), sampling_dist = T, pop_labels = c('Private', 'Public'))# Function to generate t-distribution plot
plot_t_distribution <- function(data_vec1, data_vec2 = NULL, data_df = NULL, data_var = NULL, group_var = NULL, group_cat = NULL, mu = 0, alpha = 0.05, alternative = 'two.sided', plot_title = '', shade_rejection = T, shade_pval = T, stacked = F) {
two_pop <- !is.null(group_var) || !is.null(data_vec2)
# Prep dataframe
if (!is.null(data_df)) {
data_df[[data_var]] <- unclass(data_df[[data_var]]) # unclass haven_labelled
if (two_pop) {
data_df[[group_var]] <- unclass(data_df[[group_var]])
}
data_df <- data_df %>% filter(!is.na(get(data_var))) # remove NA rows
# If group_cat not provided, use 2 values from group_var
if (two_pop && is.null(group_cat)) {
group_cat <- sort(unique(na.omit(data_df[[group_var]])))[1:2]
}
# Create samples
if (!two_pop) { # single sample
data_vec1 <- data_df[[data_var]]
} else { # two samples
data_vec1 <- (data_df %>% filter(get(group_var) == group_cat[[1]]))[[data_var]]
data_vec2 <- (data_df %>% filter(get(group_var) == group_cat[[2]]))[[data_var]]
}
}
# Calculate stats
if (!two_pop) { # single sample
data_vec1 <- na.omit(data_vec1)
# Calculate t-statistics
sample_size <- length(data_vec1)
deg_freedom <- sample_size - 1
xbar <- mean(data_vec1)
s <- sd(data_vec1)
std_err <- s / sqrt(sample_size)
t <- (xbar - mu) / std_err
} else { # two samples
data_vec1 <- na.omit(data_vec1)
data_vec2 <- na.omit(data_vec2)
# Calculate t-statistics
xbar1 <- mean(data_vec1)
xbar2 <- mean(data_vec2)
s1 <- sd(data_vec1)
s2 <- sd(data_vec2)
n1 <- length(data_vec1)
n2 <- length(data_vec2)
deg_freedom <- (s1**2/n1 + s2**2/n2)**2 / ((s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1))
std_err <- sqrt(s1**2/n1 + s2**2/n2)
t <- (xbar1 - xbar2) / std_err
}
# Calculate critical value and p-value
if (alternative == 'less') { # left-tailed
cv_lower <- qt(p = alpha, df = deg_freedom, lower.tail = T)
cv_legend <- round(cv_lower, 2)
cv_legend2 <- round(cv_lower * std_err + mu, 2)
pval <- round(pt(q = t, df = deg_freedom, lower.tail = T), 4)
} else if (alternative == 'greater') { # right-tailed
cv_upper <- qt(p = alpha, df = deg_freedom, lower.tail = F)
cv_legend <- round(cv_upper, 2)
cv_legend2 <- round(cv_upper * std_err + mu, 2)
pval <- round(pt(q = t, df = deg_freedom, lower.tail = F), 4)
} else { # two-tailed
cv_lower <- qt(p = alpha / 2, df = deg_freedom, lower.tail = T)
cv_upper <- qt(p = alpha / 2, df = deg_freedom, lower.tail = F)
cv_legend <- str_c('\u00B1', round(cv_upper, 2))
cv_legend2 <- str_c(round(cv_lower * std_err + mu, 2), ' & ', round(cv_upper * std_err + mu, 2))
pval_half <- round(pt(q = t, df = deg_freedom, lower.tail = t < 0), 4)
pval <- str_c(pval_half, ' + ', pval_half, ' = ', 2 * pval_half)
}
# Plot t-distribution
p <- ggplot(data.frame(x = -c(-4, 4)), aes(x)) +
ggtitle(plot_title) + xlab('') + ylab('') +
stat_function(fun = dt, args = list(df = deg_freedom), xlim = c(-4, 4))
# Shade rejection region using critical value
if (alternative != 'greater') {
p <- p + geom_vline(aes(xintercept = cv_lower, color = 'cval'),
linetype = 'dotted', size = 0.8, alpha = 0.8)
if (shade_rejection) {
p <- p + stat_function(fun = dt, args = list(df = deg_freedom),
xlim = c(-4, cv_lower),
geom = 'area', alpha = 0.3, fill = 'red')
}
if (shade_pval) {
p <- p + stat_function(fun = dt, args = list(df = deg_freedom),
xlim = c(-4, if_else(alternative == 'two.sided', -abs(t), t)),
geom = 'area', alpha = 0.3, fill = 'blue')
}
}
if (alternative != 'less') {
p <- p + geom_vline(aes(xintercept = cv_upper, color = 'cval'),
linetype = 'dotted', size = 0.8, alpha = 0.8)
if (shade_rejection) {
p <- p + stat_function(fun = dt, args = list(df = deg_freedom),
xlim = c(cv_upper, 4),
geom = 'area', alpha = 0.3, fill = 'red')
}
if (shade_pval) {
p <- p + stat_function(fun = dt, args = list(df = deg_freedom),
xlim = c(if_else(alternative == 'two.sided', abs(t), t), 4),
geom = 'area', alpha = 0.3, fill = 'blue')
}
}
# Legend text
legend_text <- c('t-statistics / p-value', 'critical value / alpha')
if (stacked) {
legend_text <- c(str_c('t-statistics: ', round(t, 2),
'\n(p-value: ', str_extract(pval, '[\\d.-]+$'), ')'),
str_c('Critical value: ', cv_legend,
'\n(alpha: ', round(alpha, 2), ')'))
}
stats_text <- c(str_c('t-statistics: ', round(t, 2)),
str_c('SE: ', round(std_err, 2)),
str_c('p-value: ', pval),
str_c('Critical value: ', cv_legend),
str_c('alpha: ', round(alpha, 2)))
if (!stacked) {
p <- p +
annotate('text', size = 9*5/14, x = 4.84, y = 0.14, hjust = 0,
label = 'bold(Statistics)', parse = T) +
annotate('text', size = 8*5/14, x = 4.89, y = 0:4 * -0.015 + 0.12, hjust = 0,
label = stats_text)
}
# Label plot
p <- p +
geom_vline(aes(xintercept = t, color = 'tstat'),
linetype = 'dotted', size = 0.8, alpha = 0.8) +
scale_x_continuous(sec.axis = sec_axis(trans = ~ . * std_err + mu)) +
scale_color_manual(name = if_else(stacked, 'Statistics', 'Legend'),
breaks = c('tstat', 'cval'),
labels = legend_text,
values = c(tstat = 'blue', cval = 'red')) +
theme(plot.title = element_text(size = 10, face = 'bold', hjust = 0.5),
plot.margin = unit(c(5.5, if_else(stacked, 5.5, 30), 5.5, 5.5), 'pt'),
legend.title = element_text(size = 9, face = 'bold'),
legend.text = element_text(size = 8)) +
coord_cartesian(xlim = c(-4, 4),
clip = 'off')
p
}# H0: population mean of coa_grad_res is $29,000
# Ha: population mean of coa_grad_res is NOT $29,000 (two-sided test)
# Verdict: Fail to reject H0
t.test(df_ipeds_sample$coa_grad_res, mu = 29000)##
## One Sample t-test
##
## data: df_ipeds_sample$coa_grad_res
## t = 1.2378, df = 199, p-value = 0.2173
## alternative hypothesis: true mean is not equal to 29000
## 95 percent confidence interval:
## 28405.20 31600.29
## sample estimates:
## mean of x
## 30002.74
plot_t_distribution(df_ipeds_sample$coa_grad_res, mu = 29000)# H0: population mean of coa_grad_res is $32,000
# Ha: population mean of coa_grad_res is NOT $32,000 (two-sided test)
# Verdict: Reject H0
t.test(df_ipeds_sample$coa_grad_res, mu = 32000)##
## One Sample t-test
##
## data: df_ipeds_sample$coa_grad_res
## t = -2.4653, df = 199, p-value = 0.01454
## alternative hypothesis: true mean is not equal to 32000
## 95 percent confidence interval:
## 28405.20 31600.29
## sample estimates:
## mean of x
## 30002.74
plot_t_distribution(data_df = df_ipeds_sample, data_var = 'coa_grad_res', mu = 32000)# H0: population mean of coa_grad_res is $33,000
# Ha: population mean of coa_grad_res is LESS THAN $33,000 (left-sided test)
# Verdict: Reject H0
t.test(df_ipeds_sample$coa_grad_res, mu = 33000, alternative = 'less')##
## One Sample t-test
##
## data: df_ipeds_sample$coa_grad_res
## t = -3.6997, df = 199, p-value = 0.0001396
## alternative hypothesis: true mean is less than 33000
## 95 percent confidence interval:
## -Inf 31341.53
## sample estimates:
## mean of x
## 30002.74
plot_t_distribution(df_ipeds_sample$coa_grad_res, mu = 33000, alternative = 'less')# H0: population mean of coa_grad_res is $31,000
# Ha: population mean of coa_grad_res is GREATER THAN $31,000 (right-sided test)
# Verdict: Fail to reject H0
t.test(df_ipeds_sample$coa_grad_res, mu = 31000, alternative = 'greater')##
## One Sample t-test
##
## data: df_ipeds_sample$coa_grad_res
## t = -1.231, df = 199, p-value = 0.8901
## alternative hypothesis: true mean is greater than 31000
## 95 percent confidence interval:
## 28663.96 Inf
## sample estimates:
## mean of x
## 30002.74
plot_t_distribution(df_ipeds_sample$coa_grad_res, mu = 31000, alternative = 'greater')# H0: population mean of tuitfee_grad_res is equal for control=1 and control=2
# Ha: population means are NOT equal
# Verdict: Reject H0
t.test(formula = tuitfee_grad_res ~ control, data = df_ipeds_sample,
subset = unclass(control) %in% c(1, 2))##
## Welch Two Sample t-test
##
## data: tuitfee_grad_res by control
## t = -6.7041, df = 116.65, p-value = 0.0000000007599
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11132.95 -6055.28
## sample estimates:
## mean in group 1 mean in group 2
## 10227.46 18821.58
# Here, the t-statistics line is off the grid
plot_t_distribution(data_df = df_ipeds_sample, data_var = 'tuitfee_grad_res',
group_var = 'control', group_cat = c(1, 2))# Alternatively, prepare 2 samples first to pass in
sample1 <- (df_ipeds_sample %>% filter(unclass(control) == 1))$tuitfee_grad_res
sample2 <- (df_ipeds_sample %>% filter(unclass(control) == 2))$tuitfee_grad_res
t.test(sample1, sample2)##
## Welch Two Sample t-test
##
## data: sample1 and sample2
## t = -6.7041, df = 116.65, p-value = 0.0000000007599
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11132.95 -6055.28
## sample estimates:
## mean of x mean of y
## 10227.46 18821.58
plot_t_distribution(sample1, sample2)library(patchwork)
# Use / to place plots 1 per row (stacked)
plot_distribution(df_generated_pop$norm_dist, plot_title = 'Population distribution') /
plot_distribution(df_generated_sample$norm_dist, plot_title = 'Single sample distribution') /
plot_distribution(df_generated_pop$norm_dist, sampling_dist = T,
plot_title = 'Sampling distribution')# Or use + and plot_layout()
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 1), plot_title = 'Population distributions') +
plot_distribution(data_df = df_ipeds_sample, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 1), plot_title = 'Single sample distributions') +
plot_distribution(data_df = df_ipeds_pop, data_var = 'tuitfee_grad_res', group_var = 'control',
group_cat = c(2, 1), sampling_dist = T,
plot_title = 'Sampling distribution differences') +
plot_layout(ncol = 1)# When including t-distribution plot, need to specify stacked = T
plot_distribution(df_ipeds_sample$coa_grad_res, plot_title = 'Population distribution') +
plot_distribution(df_ipeds_sample$coa_grad_res, plot_title = 'Single sample distribution') +
plot_t_distribution(df_ipeds_sample$coa_grad_res, mu = 29000, stacked = T,
plot_title = 'Sampling distribution under null') +
plot_layout(ncol = 1)